library(ggplot2)
  library(reshape2)
  library(dplyr)
  library(tidyr)
  library(GGally)
  library(grid)
  "%&%" = function(a,b) paste(a,b,sep="")
  se <- function(x) sqrt(var(x,na.rm=TRUE)/length(is.na(x)==FALSE))
  source('/Volumes/im-lab/nas40t2/hwheeler/PrediXcan_CV/GTEx_2014-06013_release/transfers/PrediXmod/Paper_plots/multiplot.R')
  my.dir <- '/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/'
  fig.dir <- '~/GitHub/GenArch/GenArchPaper/Figures/'
  rna.dir <- my.dir %&% "gtex-rnaseq/"
  annot.dir <- my.dir %&% "gtex-annot/"
  out.dir <- rna.dir %&% "ind-tissues-RPKM/"
##read in SE calculated from 100 perms - Price et al. method
setable <- read.table(my.dir %&% "SE_estimate_from_h2_localonly_reml-no-constrain_allgenes_100perms.txt",header=T)
dgn <- read.table(my.dir %&% 'expArch_DGN-WB_imputedGTs/DGN-WB.h2.all.models_FHSfdr0.05.Chr1-22_globalAll_reml-no-constrain.2015-12-15.txt',header=T)
tislist <- scan(my.dir %&% 'gtex-rnaseq/ind-tissues-from-nick/GTEx_PrediXmod.tissue.list',sep="\n",what="character")
tisspacelist <- scan(my.dir %&% '40.tissue.list',sep="\n",what="character")
table1 <- matrix(0,nrow=length(tislist)+2,ncol=6)
n <- dgn$N[1]
#numexpgenes <- dim(dgn)[1]
numexpgenes <- length(dgn$local.p[is.na(dgn$local.p)==FALSE]) 
meanh2 <- sprintf("%.3f",mean(dgn$local.h2,na.rm=TRUE))
seperm <- setable$se[1]
meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
pest <-  dgn %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH"))  %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1)
propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="n"))*100)
numsig <- table(pest$Qlt05)[2]
table1[1,] <- c("DGN Whole Blood",n,meanandse,propsig,numsig,numexpgenes)
hist(pest$local.p,main="DGN")

hist(pest$pchi,main="DGN")

#add cross-tissue to table 1
ct <- read.table(my.dir %&% 'cross-tissue.h2.all.models_FHSfdr0.05.Chr1-22_globaleQTLOtherChr_reml-no-constrain.2015-12-14.txt',header=T)
n <- ct$N[1]
#numexpgenes<-dim(ct)[1]
numexpgenes <- length(ct$local.p[is.na(ct$local.p)==FALSE]) 
meanh2 <- sprintf("%.3f",mean(ct$local.h2,na.rm=TRUE))
seperm <- setable$se[2]
meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
pest <-  ct %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH")) %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1)
propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="n"))*100)
numsig <- table(pest$Qlt05)[2]
table1[2,] <- c("Cross-tissue",n,meanandse,propsig,numsig,numexpgenes)
hist(pest$local.p,main="CT")

hist(pest$pchi,main="CT")

##calc mean global for DGN
signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)
## [1] 0.143
signif(se(dgn$loc.jt.h2),3)
## [1] 0.00153
signif(mean(dgn$glo.jt.h2,na.rm=TRUE),3)
## [1] 0.034
signif(se(dgn$glo.jt.h2),3)
## [1] 0.00239
pest <-  dgn %>% mutate(glo.jt.P=pchisq((glo.jt.h2/glo.jt.se)^2, df=1, lower.tail=FALSE)) %>%  mutate(glo.jt.Q=p.adjust(glo.jt.P,method="BH"))   %>% arrange(glo.jt.h2) %>% mutate(Qlt05=glo.jt.Q<0.1) 
propsig <- table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="n"))*100
propsig
##     TRUE 
## 4.905808
table(pest$Qlt05)
## 
## FALSE  TRUE 
##  9692   500
##prop loc
signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)/(signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)+signif(mean(dgn$glo.jt.h2,na.rm=TRUE),3))
## [1] 0.8079096
for(i in 1:length(tislist)){
  tis <- tislist[i]
  data <- read.table(my.dir %&% 'GTEx.TW.' %&% tis  %&% '.h2.all.models_FHSfdr0.05.Chr1-22_globaleQTLOtherChr_reml-no-constrain.2015-12-14.txt',header=T,sep="\t")  
  explist <- scan(out.dir %&% tis %&% ".meanRPKMgt0.1_3samplesRPKMgt0_genelist","c")
  data <- dplyr::filter(data,ensid %in% explist)
  n <- data$N[1]
  #numexpgenes <- dim(data)[1]
  numexpgenes <- length(data$local.p[is.na(data$local.p)==FALSE]) ##num expressed genes mean(RPKM)>0.1
  meanh2 <- sprintf("%.3f",mean(data$local.h2,na.rm=TRUE))
  seperm <- setable$se[i+2]
  pest <-  data %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH")) %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1) 
  propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="n"))*100)
  numsig <- table(pest$Qlt05)[2]
  tisspace <- tisspacelist[i]
  meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
  tableinfo <- c(tisspace,n,meanandse,propsig,numsig,numexpgenes)
  table1[i+2,] <- tableinfo
  hist(pest$local.p,main=tis)
  hist(pest$pchi,main=tis)
}

colnames(table1)=c("tissue","n","mean h2 (SE)","% FDR<0.1","num FDR<0.1","num expressed")
#table1

library(xtable)
tab <- xtable(table1)
print(tab, type="latex",include.rownames=FALSE)
## % latex table generated in R 3.2.0 by xtable 1.8-0 package
## % Wed Aug 10 21:13:04 2016
## \begin{table}[ht]
## \centering
## \begin{tabular}{llllll}
##   \hline
## tissue & n & mean h2 (SE) & \% FDR$<$0.1 & num FDR$<$0.1 & num expressed \\ 
##   \hline
## DGN Whole Blood & 922 & 0.149 (0.0039) & 70.6 & 7474 & 10590 \\ 
##   Cross-tissue & 450 & 0.062 (0.017) & 20.2 & 2995 & 14861 \\ 
##   Adipose - Subcutaneous & 298 & 0.038 (0.028) & 24.6 & 2666 & 10837 \\ 
##   Adrenal Gland & 126 & 0.043 (0.018) & 23.9 & 2479 & 10384 \\ 
##   Artery - Aorta & 198 & 0.042 (0.024) & 21.2 & 2385 & 11263 \\ 
##   Artery - Coronary & 119 & 0.037 (0.048) & 22.3 & 2326 & 10427 \\ 
##   Artery - Tibial & 285 & 0.042 (0.02) & 23.9 & 2489 & 10393 \\ 
##   Brain - Anterior cingulate cortex (BA24) & 72 & 0.028 (0.036) & 18.6 & 2235 & 12013 \\ 
##   Brain - Caudate (basal ganglia) & 100 & 0.037 (0.047) & 23.1 & 2521 & 10896 \\ 
##   Brain - Cerebellar Hemisphere & 89 & 0.049 (0.068) & 20.1 & 2294 & 11391 \\ 
##   Brain - Cerebellum & 103 & 0.050 (0.06) & 23.8 & 2646 & 11129 \\ 
##   Brain - Cortex & 96 & 0.045 (0.057) & 24.4 & 2593 & 10636 \\ 
##   Brain - Frontal Cortex (BA9) & 92 & 0.038 (0.077) & 21.8 & 2416 & 11070 \\ 
##   Brain - Hippocampus & 81 & 0.037 (0.05) & 21.4 & 2376 & 11120 \\ 
##   Brain - Hypothalamus & 81 & 0.017 (0.043) & 18.7 & 2238 & 11999 \\ 
##   Brain - Nucleus accumbens (basal ganglia) & 93 & 0.029 (0.04) & 22.6 & 2498 & 11038 \\ 
##   Brain - Putamen (basal ganglia) & 82 & 0.032 (0.064) & 22.8 & 2495 & 10958 \\ 
##   Breast - Mammary Tissue & 183 & 0.029 (0.037) & 22.9 & 2496 & 10894 \\ 
##   Cells - EBV-transformed lymphocytes & 115 & 0.058 (0.1) & 23.5 & 2066 & 8774 \\ 
##   Cells - Transformed fibroblasts & 272 & 0.051 (0.031) & 26.5 & 2552 & 9615 \\ 
##   Colon - Sigmoid & 124 & 0.033 (0.019) & 19.9 & 2298 & 11534 \\ 
##   Colon - Transverse & 170 & 0.036 (0.058) & 21.5 & 2398 & 11152 \\ 
##   Esophagus - Gastroesophageal Junction & 127 & 0.032 (0.039) & 20.9 & 2270 & 10854 \\ 
##   Esophagus - Mucosa & 242 & 0.042 (0.03) & 20.8 & 2432 & 11703 \\ 
##   Esophagus - Muscularis & 219 & 0.039 (0.015) & 21.7 & 2493 & 11499 \\ 
##   Heart - Atrial Appendage & 159 & 0.042 (0.03) & 22.3 & 2327 & 10447 \\ 
##   Heart - Left Ventricle & 190 & 0.034 (0.034) & 20.5 & 2203 & 10742 \\ 
##   Liver & 98 & 0.033 (0.062) & 21.7 & 2230 & 10295 \\ 
##   Lung & 279 & 0.032 (0.034) & 21.4 & 2509 & 11719 \\ 
##   Muscle - Skeletal & 361 & 0.033 (0.03) & 23.4 & 2301 & 9829 \\ 
##   Nerve - Tibial & 256 & 0.052 (0.087) & 27.1 & 2992 & 11048 \\ 
##   Ovary & 85 & 0.037 (0.094) & 25.0 & 2418 & 9690 \\ 
##   Pancreas & 150 & 0.047 (0.024) & 22.4 & 2398 & 10697 \\ 
##   Pituitary & 87 & 0.038 (0.055) & 21.4 & 2527 & 11828 \\ 
##   Skin - Not Sun Exposed (Suprapubic) & 196 & 0.041 (0.034) & 22.9 & 2490 & 10888 \\ 
##   Skin - Sun Exposed (Lower leg) & 303 & 0.039 (0.016) & 23.5 & 2687 & 11442 \\ 
##   Small Intestine - Terminal Ileum & 77 & 0.036 (0.053) & 24.1 & 2591 & 10771 \\ 
##   Spleen & 89 & 0.059 (0.061) & 24.1 & 2451 & 10170 \\ 
##   Stomach & 171 & 0.032 (0.025) & 21.4 & 2338 & 10903 \\ 
##   Testis & 157 & 0.054 (0.044) & 22.0 & 3009 & 13703 \\ 
##   Thyroid & 279 & 0.044 (0.066) & 24.7 & 2838 & 11487 \\ 
##   Whole Blood & 339 & 0.033 (0.023) & 25.4 & 2260 & 8915 \\ 
##    \hline
## \end{tabular}
## \end{table}

Calculate num expressed genes and make lists per tissue for filtering

tislist <- scan(my.dir %&% 'tissue.list',sep="\n",what="character")

expidlist <- scan(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.ID.list","character")
expgenelist <- scan(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.GENE.list","character")
exp <- readRDS(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.GENExID.RDS")
expdata <- matrix(exp, ncol=length(expidlist), byrow=T)
t.expdata <- t(expdata)
rownames(t.expdata) <- expidlist
colnames(t.expdata) <- expgenelist

gencodefile <- annot.dir %&% "gencode.v18.genes.patched_contigs.summary.protein"
gencode <- read.table(gencodefile)
rownames(gencode) <- gencode[,5]
t.expdata <- t.expdata[,intersect(colnames(t.expdata),rownames(gencode))] ###pull protein coding gene expression data

sam <- read.table(annot.dir %&% "GTEx_Analysis_2014-06-13.SampleTissue.annot",header=T,sep="\t")

for(i in 1:length(tislist)){
  tissue <- tislist[i]
  tis<- gsub(' ','',tissue) ##removes all whitespace to match .RDS files
  sample <- subset(sam,SMTSD == tissue) ### pull sample list of chosen tissue
  tissue.exp <- t.expdata[intersect(rownames(t.expdata),sample$SAMPID),] ###pull expression data for chosen tissue###
  tissue.exp <- t(tissue.exp) #for merging in R
  
  explist <- subset(rowMeans(tissue.exp), rowMeans(tissue.exp)>0.1) ###pull genes with mean expression > 0.1###
  explist <- names(explist)
  nz.expdata <- tissue.exp[explist,]

  #calc 10% of sample size
  tenpercent <- round(0.1*dim(nz.expdata)[2])
  
  rowtable<-function(x) table(x>0)[[1]] > 2 ##function to determine if >2 samples have exp levels >0
  nonbin<-apply(nz.expdata,1,rowtable) ##apply to matrix
  gt2.expdata <- nz.expdata[nonbin,] ##remove genes with >10% of RPKM's==0
  
  write.table(rownames(gt2.expdata),file=out.dir %&% tis %&% ".meanRPKMgt0.1_3samplesRPKMgt0_genelist",quote=FALSE,col.names = FALSE,row.names=FALSE)
  cat(tis,":",dim(gt2.expdata)[1],"genes\n")
}